/**********************************************************************/
/*
   Author: Karan Makkar
   Created: Octoer 2023
   Updated: Aug 2025, by Youssef Assarssah
   Description: Prakerja Program and Training Course Take-up in Admin Data
   vs National Surveys report applied/selected. 
   Note: Use "get cash before survey" rather than "win before survey" as endogenous var
   Output: TableA15
*/
/**********************************************************************/

/*----------------------------------------------------*/
* Section: Setup
/*----------------------------------------------------*/

* include filepaths 
  if "$master_run" !="1" include "./Do/SET_FILEPATHS.do"

  clear all
  set more off
  set matsize 11000

* Log
  cap log close
  global prefix: display %tdCYND td(`c(current_date)')
  log using "$KP_logs/${prefix}_report_win_rates_gc.txt", text replace

* Declare filepaths
* Survey-Only
  global sep20 "$KP_deid_susenas/Clean/sus_sep20_deid_clean.dta"
  global mar21 "$KP_deid_susenas/Clean/sus_mar21_deid_clean.dta"
  global sep21 "$KP_deid_susenas/Clean/sus_sep21_deid_clean.dta"
  global mar22 "$KP_deid_susenas/Clean/sus_mar22_deid_clean.dta"
  global aug20 "$KP_deid_sakernas/Clean/sak_aug20_deid_clean.dta"
  global feb21 "$KP_deid_sakernas/Clean/sak_feb21_deid_clean.dta"
  global aug21 "$KP_deid_sakernas/Clean/sak_aug21_deid_clean.dta"
   
  * Merged with Admin
  global sep20_m "$KP_deid_susenas/Clean/sus_sep20_deid_clean_merged.dta"
  global mar21_m "$KP_deid_susenas/Clean/sus_mar21_deid_clean_merged.dta"
  global sep21_m "$KP_deid_susenas/Clean/sus_sep21_deid_clean_merged.dta"
  global mar22_m "$KP_deid_susenas/Clean/sus_mar22_deid_clean_merged.dta"
  global aug20_m "$KP_deid_sakernas/Clean/sak_aug20_deid_clean_merged.dta"
  global feb21_m "$KP_deid_sakernas/Clean/sak_feb21_deid_clean_merged.dta"
  global aug21_m "$KP_deid_sakernas/Clean/sak_aug21_deid_clean_merged.dta"

  * SAMPLE RESTRICTIONS: restrict to person-batch observations before date surveyed
  global sak_aug20_precomma inrange(batch, 2, 3) & sak_round == 5
  global sak_feb21_precomma inrange(batch, 2, 11) & sak_round == 6  
  global sak_aug21_precomma inrange(batch, 2, 17) & sak_round == 7 
  global sak_stack_precomma (inrange(batch, 2, 3) & sak_round == 5) | (inrange(batch, 2, 11) & sak_round == 6) |(inrange(batch, 2, 17) & sak_round == 7) 
  global sus_sep20_precomma inrange(batch, 2, 5) & sus_round == 5
  global sus_mar21_precomma inrange(batch, 2, 11) & sus_round == 6
  global sus_sep21_precomma inrange(batch, 2, 18) & sus_round == 7
  global sus_mar22_precomma inrange(batch, 2, 22) & sus_round == 8
  global sus_stack_precomma (inrange(batch, 2, 5) & sus_round == 5) | (inrange(batch, 2, 11) & sus_round == 6) | (inrange(batch, 2, 18) & sus_round == 7) | (inrange(batch, 2, 22) & sus_round == 8)


* SE CLUSTERING (OLS): Individual level
  global vce vce(cluster anon_id4)
  global cluster cluster(anon_id4)

* declare list to keep for first stage regs
    global keep_list anon_id4 ///
                    strata ///
                    gender ///
                    female ///
                    batch /// 
                    urban /// 
                    school_years ///
                    education ///
                    age /// 
                    multi_apply_hh /// 
                    ever_win_3 ///
                    ever_win_5 ///
                    ever_win_11 ///
                    ever_win_17 ///
                    ever_win_18 /// 
                    ever_win_22 ///
                    ever_win_39 ///
                    win_in_batch ///
                    stratum_win_prob

/*----------------------------------------------------*/
* Section:  First Stage Regressions
/*----------------------------------------------------*/

* load sakernas
    use "${aug20_m}", clear
    append using "${feb21_m}"
    append using "${aug21_m}"

    bysort anon_id4: gegen date_incentive = max(date_incentive), replace

    keep $keep_list sak_round report_applied report_selected train_certif date_incentive
    mdesc $keep_list sak_round report_applied report_selected train_certif

    * Merge in match data
    fmerge m:1 anon_id4 sak_round using "$KP_deid_sakernas/Clean/age_gender_educ_match_ids.dta", assert(2 3) keep(3) nogen keepusing(match3_sus)

    tempfile sakernas 
    save `sakernas'

* load susenas
    use "${sep20_m}", clear
    append using "${mar21_m}"
    append using "${sep21_m}"
    append using "${mar22_m}"

    bysort anon_id4: gegen date_incentive = max(date_incentive), replace

    keep $keep_list sus_round get_pk hh_pk_win date_incentive
    mdesc $keep_list sus_round get_pk hh_pk_win

    * Merge in match data
    fmerge m:1 anon_id4 sus_round using "$KP_deid_susenas/Clean/age_gender_educ_match_ids.dta", assert(2 3) keep(3) nogen keepusing(match3_sus)

* append datasets    
    append using `sakernas'

******* Gen Variables ********

* Win Before Survey
gen win_before_survey =.
replace win_before_survey  = ever_win_3 if sak_round == 5 & inrange(batch, 2, 3)
replace win_before_survey  = ever_win_11 if sak_round == 6 & inrange(batch, 2, 11)
replace win_before_survey  = ever_win_17 if sak_round == 7 & inrange(batch, 2, 17)
replace win_before_survey  = ever_win_5 if sus_round == 5 & inrange(batch, 2, 5)
replace win_before_survey  = ever_win_11 if sus_round == 6 & inrange(batch, 2, 11)
replace win_before_survey  = ever_win_18 if sus_round == 7 & inrange(batch, 2, 18)
replace win_before_survey  = ever_win_22 if sus_round == 8 & inrange(batch, 2, 22)

* Cash Before Survey
gen cash_before_survey =0
replace cash_before_survey  = 1 if sak_round == 5 & inrange(batch, 2, 3) & date_incentive <= date("2020/7/31", "YMD")
replace cash_before_survey  = 1 if sak_round == 6 & inrange(batch, 2, 11) & date_incentive <= date("2021/1/31", "YMD")
replace cash_before_survey  = 1 if sak_round == 7 & inrange(batch, 2, 17) & date_incentive <= date("2021/7/31", "YMD")
replace cash_before_survey  = 1 if sus_round == 5 & inrange(batch, 2, 5) & date_incentive <= date("2020/8/30", "YMD")
replace cash_before_survey  = 1 if sus_round == 6 & inrange(batch, 2, 11) & date_incentive <= date("2021/2/28", "YMD")
replace cash_before_survey  = 1 if sus_round == 7 & inrange(batch, 2, 18) & date_incentive <= date("2021/8/30", "YMD")
replace cash_before_survey  = 1 if sus_round == 8 & inrange(batch, 2, 22) & date_incentive <= date("2022/2/28", "YMD")
gegen cash_before_survey = max(cash_before_survey), by(anon_id4 sus_round sak_round) replace

* Report win SUS rename 
  replace report_selected = get_pk if inlist(sus_round, 5, 8)
  replace report_selected = hh_pk_win if inlist(sus_round, 6, 7)

  *Education cat
  recode education (1/2 = 1) (3 = 2) (4/8 = 3), gen(ed_cat) 

  *Age Cat
  cap drop age_cat
  recode age (0/30 = 1) (31/50 = 2) (51/100 = 3), gen(age_cat) 

  * Drop non-randomized batches
  drop if inlist(batch, 1, 15)

* Initialize vars
    gen b_fs =.
    gen se_fs = .
    gen b_course =.
    gen se_course =.

* PMO first stage/course
  local i = 1
  foreach wave in sak_stack sak_aug20 sak_feb21 sak_aug21 sus_stack sus_sep20 sus_mar21 sus_sep21 sus_mar22 {

    if !inlist("`wave'", "sak_stack", "sus_stack") {
      global absorb absorb(c.stratum_win_prob#batch gender#batch urban#batch ed_cat#batch)
    }
    if "`wave'" == "sus_stack" {
      global absorb absorb(c.stratum_win_prob#batch#sus_round gender#batch#sus_round urban#batch#sus_round ed_cat#batch#sus_round)
    }
    if "`wave'" == "sak_stack" {
      global absorb absorb(c.stratum_win_prob#batch#sak_round gender#batch#sak_round urban#batch#sak_round ed_cat#batch#sak_round)
    }

    * Report Selected IV
    qui sum report_selected if ${`wave'_precomma} & cash_before_survey == 0
    scalar mean = round(r(mean), 0.001)
    eststo t`i'b: ivreghdfe report_selected (cash_before_survey = win_in_batch)  if ${`wave'_precomma}, $absorb $cluster
    estadd scalar control_mean = mean
    estadd scalar obs = e(N) 

    * First Stage
    qui sum win_before_survey if ${`wave'_precomma} & win_in_batch == 0
    scalar mean = round(r(mean), 0.001)
    eststo t`i'd: reghdfe win_before_survey win_in_batch  if ${`wave'_precomma}, $absorb $vce
    estadd scalar control_mean = mean
    estadd scalar obs = e(N) 
       
   * Training Certificate
   if inlist("`wave'", "sak_stack", "sak_aug20", "sak_feb21", "sak_aug21") {
        qui sum train_certif if ${`wave'_precomma} & cash_before_survey == 0
        scalar mean = round(r(mean), 0.001)
        eststo t`i'c: ivreghdfe train_certif (cash_before_survey = win_in_batch)  if ${`wave'_precomma}, $absorb $cluster
        estadd scalar control_mean = mean
        estadd scalar obs = e(N) 
   }
   local i = `i' + 1
  }

/*----------------------------------------------------*/
* Section:  Generate Table
/*----------------------------------------------------*/

*Labels
gen report_selected_t =.
label var report_selected_t "Total Kartu Prakerja Winners (Survey)"
gen total =.
label var total "Total Kartu Prakerja Winners (PMO)"
gen report_selected_m =.
label var report_selected_m "Share Report Win"
label var cash_before_survey "Ever Win"
label var report_selected "Report Won Kartu Prakerja"
label var win_in_batch "Win In Batch"

* Make Table

*Win in PMO OLS (First Stage)
  esttab t1d t2d t3d t4d t5d t6d t7d t8d t9d using $KP_output/tables/descriptive/tex/TableA15.tex, ///
    mtitle("Stacked" "Aug '20" "Feb '21" "Aug '21" "Stacked" "Sep '20" "Mar '21" "Sep '21" "Mar '22" ) ///
    mgroups("Workforce Survey Wave" "Welfare Survey Wave", pattern(1 0 0 0 1 0 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
    b(%9.3f) se(%9.3f) ///
    keep(win_in_batch) ///
    collabels(none) ///
    booktabs ///
    label ///
    star(* 0.10 ** 0.05 *** 0.01) /// 
    nolines prehead(`"\hline \hline"') ///
    prefoot(`"\\"') ///
    posthead(`"& (1) & (2) & (3) & (4) & (5) & (6) & (7) & (8) & (9) \\"' `"\hline \\ \multicolumn{7}{l}{\textit{Panel A: Win in administrative data}} \\\\[-2.5ex]"') ///
    nonum ///
    stats(N control_mean, label("Observations" "Mean (Control)") fmt(%9.0fc 3)) ///
    fragment ///
    replace

*Report Win IV
  esttab t1b t2b t3b t4b t5b t6b t7b t8b t9b using $KP_output/tables/descriptive/tex/TableA15.tex, ///
    b(%9.3f) se(%9.3f) ///
    keep(cash_before_survey) ///
    collabels(none) ///
    booktabs ///
    label ///
    star(* 0.10 ** 0.05 *** 0.01) /// 
    nomtitles nonumbers ///
    prefoot(`"\\"') ///
    nolines posthead("\hline \\ \multicolumn{7}{l}{\textit{Panel B: Report selected in survey data}} \\\\[-2.5ex]") ///
    stats(N control_mean, label("Observations" "Mean (Control)") fmt(%9.0fc 3)) ///
    fragment ///
    append

*Take Course IV
  esttab t1c t2c t3c t4c using $KP_output/tables/descriptive/tex/TableA15.tex, ///
    b(%9.3f) se(%9.3f) ///
    keep(cash_before_survey) ///
    collabels(none) ///
    booktabs ///
    label ///
    star(* 0.10 ** 0.05 *** 0.01) /// 
    nomtitles nonumbers ///
    prefoot(`"\\"') ///
    nolines posthead("\hline \\ \multicolumn{7}{l}{\textit{Panel C: Ever Received Training Certificate}} \\\\[-2.5ex]") ///
    postfoot(`"Batches & 2-14, 16-17 & 2-3 & 2-11 & 2-14, 16-17 & 2-14, 16-22 & 2-5 & 2-11 & 2-14, 16-18 & 2-14, 16-22 \\"' `"\hline"') ///
    stats(N control_mean, label("Observations" "Mean (Control)") fmt(%9.0fc 3)) ///
    fragment ///
    append

// END